library(tidyverse)
library(kableExtra)
library(viridis)
library(ggExtra)
library(gghighlight)
library(ggridges)
library(gganimate)
library(gifski)
library(png)df <- readr::read_csv("merged.csv")## Parsed with column specification:
## cols(
## player = col_character(),
## year = col_double(),
## age = col_double(),
## ranking = col_double(),
## points = col_double(),
## tourn_played = col_double(),
## matches = col_double(),
## surface = col_character(),
## percentage_service = col_double(),
## percentage_return = col_double(),
## games_won_service = col_double(),
## games_total_service = col_double(),
## games_won_return = col_double(),
## games_total_return = col_double()
## )
#ggplotの文字化け対策
theme_set(theme_bw(base_family = "HiraKakuProN-W3"))df %>% dplyr::glimpse()## Observations: 8,678
## Variables: 14
## $ player <chr> "Pete Sampras", "Michael Stich", "Guy Forge…
## $ year <dbl> 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1…
## $ age <dbl> 20, 23, 26, 25, 21, 31, 24, 20, 20, 27, 22,…
## $ ranking <dbl> 6, 4, 7, 1, 2, 5, 3, 45, 16, 20, 50, 69, 26…
## $ points <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tourn_played <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ matches <dbl> 71, 99, 84, 93, 78, 73, 62, 46, 64, 77, 55,…
## $ surface <chr> "All", "All", "All", "All", "All", "All", "…
## $ percentage_service <dbl> 87.39, 87.26, 86.15, 85.36, 84.53, 84.48, 8…
## $ percentage_return <dbl> 26.00, 24.10, 24.56, 33.33, 29.52, 29.44, 2…
## $ games_won_service <dbl> 721, 925, 784, 892, 754, 773, 668, 496, 559…
## $ games_total_service <dbl> 825, 1060, 910, 1045, 892, 915, 792, 590, 6…
## $ games_won_return <dbl> 209, 255, 223, 348, 258, 272, 215, 122, 146…
## $ games_total_return <dbl> 804, 1058, 908, 1044, 874, 924, 797, 587, 6…
df %>% summary()## player year age ranking
## Length:8678 Min. :1991 Min. :17.00 Min. : 1
## Class :character 1st Qu.:1997 1st Qu.:23.00 1st Qu.: 17
## Mode :character Median :2004 Median :26.00 Median : 36
## Mean :2004 Mean :25.93 Mean : 40
## 3rd Qu.:2011 3rd Qu.:28.00 3rd Qu.: 59
## Max. :2018 Max. :40.00 Max. :100
## NA's :1790 NA's :1790
## points tourn_played matches surface
## Min. : 0 Min. : 0.0 Min. : 2.0 Length:8678
## 1st Qu.: 565 1st Qu.:20.0 1st Qu.: 5.0 Class :character
## Median : 900 Median :24.0 Median : 16.0 Mode :character
## Mean : 1278 Mean :20.7 Mean : 23.1
## 3rd Qu.: 1475 3rd Qu.:27.0 3rd Qu.: 35.0
## Max. :16585 Max. :38.0 Max. :105.0
## NA's :1790 NA's :1790
## percentage_service percentage_return games_won_service
## Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 73.67 1st Qu.:17.78 1st Qu.: 58.0
## Median : 78.74 Median :22.86 Median : 141.0
## Mean : 77.76 Mean :22.25 Mean : 217.9
## 3rd Qu.: 83.49 3rd Qu.:27.27 3rd Qu.: 329.0
## Max. :100.00 Max. :51.03 Max. :1108.0
##
## games_total_service games_won_return games_total_return
## Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 73.0 1st Qu.: 13.00 1st Qu.: 72.25
## Median : 182.0 Median : 46.00 Median : 180.00
## Mean : 274.5 Mean : 67.45 Mean : 273.64
## 3rd Qu.: 413.0 3rd Qu.:104.00 3rd Qu.: 412.00
## Max. :1229.0 Max. :431.00 Max. :1224.00
##
df %>%
dplyr::filter(year == 2018 & surface == "All") %>%
dplyr::arrange(ranking) %>%
kableExtra::kable() %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)| player | year | age | ranking | points | tourn_played | matches | surface | percentage_service | percentage_return | games_won_service | games_total_service | games_won_return | games_total_return |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Novak Djokovic | 2018 | 31 | 1 | 9045 | 17 | 65 | All | 87.23 | 30.26 | 724 | 830 | 246 | 813 |
| Rafael Nadal | 2018 | 32 | 2 | 7480 | 13 | 47 | All | 86.44 | 36.55 | 529 | 612 | 216 | 591 |
| Roger Federer | 2018 | 37 | 3 | 6420 | 17 | 58 | All | 91.10 | 23.89 | 686 | 753 | 178 | 745 |
| Alexander Zverev | 2018 | 21 | 4 | 6385 | 21 | 73 | All | 82.95 | 27.60 | 754 | 909 | 252 | 913 |
| Juan Martin del Potro | 2018 | 30 | 5 | 5300 | 18 | 60 | All | 87.61 | 25.20 | 672 | 767 | 192 | 762 |
| Kevin Anderson | 2018 | 32 | 6 | 4710 | 20 | 66 | All | 89.06 | 15.95 | 847 | 951 | 149 | 934 |
| Marin Cilic | 2018 | 30 | 7 | 4250 | 19 | 58 | All | 86.99 | 22.79 | 709 | 815 | 183 | 803 |
| Dominic Thiem | 2018 | 25 | 8 | 4095 | 25 | 70 | All | 85.18 | 24.44 | 753 | 884 | 217 | 888 |
| Kei Nishikori | 2018 | 29 | 9 | 3590 | 24 | 64 | All | 81.38 | 24.46 | 638 | 784 | 191 | 781 |
| John Isner | 2018 | 33 | 10 | 3155 | 23 | 54 | All | 93.60 | 9.42 | 805 | 860 | 80 | 849 |
| Karen Khachanov | 2018 | 22 | 11 | 2835 | 25 | 66 | All | 86.40 | 23.00 | 724 | 838 | 193 | 839 |
| Borna Coric | 2018 | 22 | 12 | 2480 | 20 | 54 | All | 83.54 | 26.14 | 528 | 632 | 166 | 635 |
| Fabio Fognini | 2018 | 31 | 13 | 2315 | 25 | 64 | All | 77.26 | 29.43 | 615 | 796 | 236 | 802 |
| Kyle Edmund | 2018 | 23 | 14 | 2150 | 23 | 57 | All | 84.40 | 21.95 | 622 | 737 | 162 | 738 |
| Stefanos Tsitsipas | 2018 | 20 | 15 | 2095 | 30 | 73 | All | 85.20 | 19.69 | 783 | 919 | 177 | 899 |
| Daniil Medvedev | 2018 | 22 | 16 | 1977 | 27 | 64 | All | 81.02 | 23.14 | 619 | 764 | 177 | 765 |
| Diego Schwartzman | 2018 | 26 | 17 | 1880 | 26 | 55 | All | 74.10 | 30.67 | 492 | 664 | 207 | 675 |
| Milos Raonic | 2018 | 28 | 18 | 1855 | 20 | 45 | All | 90.80 | 15.54 | 533 | 587 | 90 | 579 |
| Marco Cecchinato | 2018 | 26 | 20 | 1819 | 30 | 46 | All | 78.20 | 18.92 | 470 | 601 | 112 | 592 |
| Nikoloz Basilashvili | 2018 | 26 | 21 | 1795 | 29 | 55 | All | 75.92 | 20.72 | 514 | 677 | 139 | 671 |
| Pablo Carreno Busta | 2018 | 27 | 23 | 1705 | 22 | 52 | All | 78.23 | 26.17 | 503 | 643 | 168 | 642 |
| Roberto Bautista Agut | 2018 | 30 | 24 | 1605 | 24 | 51 | All | 80.10 | 26.04 | 499 | 623 | 157 | 603 |
| Hyeon Chung | 2018 | 22 | 25 | 1585 | 23 | 47 | All | 78.28 | 28.24 | 429 | 548 | 157 | 556 |
| Richard Gasquet | 2018 | 32 | 26 | 1535 | 25 | 56 | All | 78.97 | 27.01 | 492 | 623 | 168 | 622 |
| Denis Shapovalov | 2018 | 19 | 27 | 1440 | 27 | 60 | All | 82.17 | 19.40 | 659 | 802 | 154 | 794 |
| Fernando Verdasco | 2018 | 35 | 28 | 1410 | 28 | 61 | All | 79.09 | 24.81 | 609 | 770 | 193 | 778 |
| Gael Monfils | 2018 | 32 | 29 | 1400 | 24 | 49 | All | 80.65 | 23.07 | 525 | 651 | 149 | 646 |
| Gilles Simon | 2018 | 34 | 30 | 1370 | 28 | 56 | All | 77.48 | 28.40 | 523 | 675 | 190 | 669 |
| Alex de Minaur | 2018 | 19 | 31 | 1298 | 26 | 48 | All | 80.07 | 23.83 | 478 | 597 | 143 | 600 |
| Steve Johnson | 2018 | 29 | 33 | 1190 | 25 | 49 | All | 85.81 | 17.64 | 520 | 606 | 106 | 601 |
| Philipp Kohlschreiber | 2018 | 35 | 34 | 1125 | 23 | 46 | All | 82.09 | 19.60 | 486 | 592 | 119 | 607 |
| Marton Fucsovics | 2018 | 26 | 36 | 1122 | 27 | 47 | All | 76.98 | 26.83 | 438 | 569 | 154 | 574 |
| Andreas Seppi | 2018 | 34 | 37 | 1106 | 24 | 45 | All | 79.03 | 22.82 | 471 | 596 | 136 | 596 |
| Frances Tiafoe | 2018 | 20 | 39 | 1080 | 25 | 52 | All | 81.76 | 17.98 | 538 | 658 | 119 | 662 |
| Jeremy Chardy | 2018 | 31 | 40 | 1050 | 25 | 45 | All | 82.69 | 16.84 | 473 | 572 | 95 | 564 |
| Adrian Mannarino | 2018 | 30 | 42 | 1045 | 29 | 54 | All | 75.82 | 19.26 | 511 | 674 | 131 | 680 |
| Nicolas Jarry | 2018 | 23 | 43 | 1022 | 22 | 45 | All | 82.97 | 16.59 | 526 | 634 | 105 | 633 |
| Joao Sousa | 2018 | 29 | 44 | 1017 | 29 | 51 | All | 76.79 | 22.74 | 526 | 685 | 156 | 686 |
| Damir Dzumhur | 2018 | 26 | 47 | 985 | 31 | 54 | All | 70.27 | 26.22 | 442 | 629 | 167 | 637 |
| Dusan Lajovic | 2018 | 28 | 48 | 985 | 25 | 46 | All | 77.98 | 22.99 | 464 | 595 | 137 | 596 |
| Robin Haase | 2018 | 31 | 50 | 965 | 28 | 52 | All | 76.94 | 21.16 | 544 | 707 | 150 | 709 |
| Benoit Paire | 2018 | 29 | 52 | 935 | 29 | 54 | All | 73.40 | 23.95 | 505 | 688 | 165 | 689 |
| Pierre-Hugues Herbert | 2018 | 27 | 55 | 903 | 25 | 45 | All | 82.79 | 16.78 | 505 | 610 | 101 | 602 |
| Leonardo Mayer | 2018 | 31 | 56 | 895 | 25 | 47 | All | 82.06 | 18.01 | 485 | 591 | 105 | 583 |
| Jan-Lennard Struff | 2018 | 28 | 57 | 875 | 25 | 47 | All | 80.74 | 18.91 | 499 | 618 | 118 | 624 |
| Guido Pella | 2018 | 28 | 58 | 860 | 25 | 45 | All | 78.61 | 21.88 | 463 | 589 | 128 | 585 |
| Peter Gojowczyk | 2018 | 29 | 59 | 855 | 25 | 48 | All | 79.57 | 17.74 | 444 | 558 | 99 | 558 |
| Albert Ramos-Vinolas | 2018 | 30 | 65 | 790 | 29 | 46 | All | 75.44 | 17.05 | 424 | 562 | 97 | 569 |
| Mischa Zverev | 2018 | 31 | 69 | 750 | 31 | 48 | All | 76.79 | 20.18 | 430 | 560 | 115 | 570 |
plot_tmp <- function(data){
data %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlim(0, 55) +
ylim(55, 100) +
xlab("break[%]") +
ylab("keep[%]")
}
df %>%
dplyr::filter(ranking <= 50 & matches >= 5) %>%
split(.$surface) %>%
purrr::map(~ plot_tmp(.x))## $All
##
## $Clay
##
## $Grass
##
## $Hard
ggExtra::ggMarginal()でヒストグラムも一緒に
(purrr::map()がうまく動かなかったのでsurfaceごとにコード書いた…)
All
df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "All") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "Clay") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "Grass") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "Hard") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")df %>%
dplyr::filter(ranking <= 50 & matches >= 5) %>%
ggplot(aes(x = percentage_service, y = as.factor(surface), fill = ..x..)) +
geom_density_ridges_gradient(scale = 1, rel_min_height = 0.01) +
scale_fill_viridis(name = "keep[%]", option = "C") +
xlab("keep[%]") +
ylab("surface")## Picking joint bandwidth of 1.18
df %>%
dplyr::filter(ranking <= 50 & matches >= 5) %>%
ggplot(aes(x = percentage_return, y = as.factor(surface), fill = ..x..)) +
geom_density_ridges_gradient(scale = 1, rel_min_height = 0.01) +
scale_fill_viridis(name = "break[%]", option = "C") +
xlab("break[%]") +
ylab("surface")## Picking joint bandwidth of 1.19
plot_tmp <- function(data){
data %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlim(0, 55) +
ylim(55, 100) +
xlab("break[%]") +
ylab("keep[%]")
}
df %>%
dplyr::filter(ranking <= 10 & matches >= 5) %>%
split(.$surface) %>%
purrr::map(~ plot_tmp(.x))## $All
##
## $Clay
##
## $Grass
##
## $Hard
ggExtra::ggMarginal()でヒストグラムも一緒に
(Top50と同様、purrr::map()がうまく動かなかったのでsurfaceごとにコード書いた…)
All
df %>%
dplyr::filter(ranking <= 10 & matches >= 5 & surface == "All") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")df %>%
dplyr::filter(ranking <= 10 & matches >= 5 & surface == "Clay") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")df %>%
dplyr::filter(ranking <= 10 & matches >= 5 & surface == "Grass") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")df %>%
dplyr::filter(ranking <= 10 & matches >= 5 & surface == "Hard") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
xlab("break[%]") +
ylab("keep[%]") +
theme(legend.position = "bottom") -> p
p %>% ggExtra::ggMarginal(type = "histogram", fill = "blue")player_list <- c("Roger Federer", "Rafael Nadal", "Novak Djokovic", "Andy Murray" , "others(Top10)")
color_list <- c("Roger Federer" = "#ffbb00", "Rafael Nadal" = "#fb6542", "Novak Djokovic" = "#375e97", "Andy Murray" = "#3f671c", "others(Top10)" = "gray80")
plot_tmp <- function(data){
data %>%
dplyr::mutate(player = ifelse(player %in% player_list, player, "others(Top10)")) %>%
transform(player = factor(player, levels = player_list)) %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=player, size=matches)) +
geom_point(alpha = 0.6) +
scale_color_manual(values = color_list) +
xlab("break[%]") +
ylab("keep[%]")
}
df %>%
dplyr::filter(ranking <= 10 & matches >= 5) %>%
split(.$surface) %>%
purrr::map(~ plot_tmp(.x))## $All
##
## $Clay
##
## $Grass
##
## $Hard
plot_tmp <- function(data){
data %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=year, size=matches)) +
geom_point() +
scale_color_viridis() +
gghighlight(player == "Kei Nishikori") +
xlab("break[%]") +
ylab("keep[%]")
}
df %>%
dplyr::filter(ranking <= 50 & matches >= 5) %>%
split(.$surface) %>%
purrr::map(~ plot_tmp(.x))## $All
##
## $Clay
##
## $Grass
##
## $Hard
df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "All") %>%
ggplot(aes(x=percentage_return, y=percentage_service, color=ranking, size=matches)) +
geom_point() +
scale_color_viridis() +
labs(title = 'year: {frame_time}', x = 'break[%]', y = 'keep[%]') +
transition_time(as.integer(year)) +
ease_aes('linear')微妙な変化だが、徐々にサービス優位になってる様子
keep[%]
df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "All") %>%
ggplot(aes(x = percentage_service, y = as.factor(year), fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.05) +
scale_fill_viridis(name = "keep[%]", option = "C") +
xlab("keep[%]") +
ylab("year") +
labs(title = 'keep percentage of Top50 players')## Picking joint bandwidth of 1.88
df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "All") %>%
ggplot(aes(x = percentage_return, y = as.factor(year), fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.05) +
scale_fill_viridis(name = "break[%]", option = "C") +
xlab("break[%]") +
ylab("year") +
labs(title = 'break percentage of Top50 players')## Picking joint bandwidth of 1.75
2017年にTop10選手の顔ぶれが若返ったかに思えたが2018年は元に戻った模様.
Top50 players
df %>%
dplyr::filter(ranking <= 50 & matches >= 5 & surface == "All") %>%
ggplot(aes(x = age, y = as.factor(year), fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.05) +
scale_fill_viridis(name = "age", option = "C") +
xlab("age") +
ylab("year") +
labs(title = 'Age of Top50 players')## Picking joint bandwidth of 1.3
df %>%
dplyr::filter(ranking <= 10 & matches >= 5 & surface == "All") %>%
ggplot(aes(x = age, y = as.factor(year), fill = ..x..)) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.05) +
scale_fill_viridis(name = "age", option = "C") +
xlab("age") +
ylab("year") +
labs(title = 'Age of Top10 players')## Picking joint bandwidth of 1.34